import pandas as pd
from config import shanlian_config
import warnings
from config import  shanlian_config
from config.excel_config import column_name
warnings.filterwarnings('ignore')
from config.excel_config import shanlian_column_types
from utils import clean_utils
from clean_data.common.common import get_site_dict

# 产品名称B -> 配送商备案产品I
# 生产企业名称D -> 生产厂家J2
# 购货单位名称F -> 医院名称L2
# 销售日期O -> 出库时间C
# 销售数量T -> 数量U2
# 批号AN -> 批次信息AD2
# 单价BZ -> 单价V2
def clean_data():
    df = pd.read_excel(shanlian_config.selenium_excel_file_url, dtype=shanlian_column_types)
    condition = ((df['销售日期'] >= shanlian_config.query_start_time) & (df['销售日期'] <= shanlian_config.query_end_time))
    df = df[condition]
    # condition2 = ((df['产品名称'] == '血气测定试剂盒（电极法）Measurement Cartridge')
    #               & (df['购货单位名称'] == '国药控股广西有限公司'))
    # # ~ 取反
    # df = df[~condition2]

    # 加上id
    if len(df.index) != 0:
        # df['ID'] = shanlian_config.id_prefix + df.index.map(lambda x: f'{x + 1:03d}')
        df['ID'] = clean_utils.get_ids(shanlian_config.id_prefix, df['销售日期'].tolist())
    else:
        df['ID'] = []
    # 加上数据源
    df['数据来源'] = shanlian_config.data_source

    #补充医院编码
    site_dict = get_site_dict()
    yymc_code = []
    exception_msg = []
    for yymc in df['购货单位名称']:
        if yymc in site_dict:
            yymc_code.append(site_dict[yymc])
            exception_msg.append('')
        else:
            yymc_code.append('')
            exception_msg.append(f'站点信息备案表 没有站点 {yymc}')

    data = {
        column_name['ID']: df['ID'],
        column_name['CKSJ']: df['销售日期'],
        column_name['PSS']: df['货主'],
        column_name['CPMC_PSS']: df['产品名称'],
        column_name['CPMC_GSNB']: df['产品名称'],
        column_name['CPBM_PSS']: df['商品编码'],
        column_name['CPBM_GSNB']: df['商品编码'],
        column_name['SCCJ']: df['生产企业名称'],
        column_name['KHMC_PSS']: df['购货单位名称'],
        column_name['YYMC_GSNB']: df['购货单位名称'],
        column_name['KHBM']: df['客户编码'],
        column_name['YYBM']: yymc_code,
        column_name['SL']: df['销售数量'],
        column_name['DJ']: df['单价'],
        column_name['PCXX']: df['批号'],
        column_name['LHXX']: ['深圳闪链医疗供应链管理有限公司'] * len(yymc_code),
        column_name['SJLY']: df['数据来源'],
        column_name['YC']:  exception_msg
    }

    df = pd.DataFrame(data)

    df.to_excel(shanlian_config.excel_file_url, index=False)


if __name__ == '__main__':
    clean_data()
